package com.javastudy.spider.basic;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;

//http://www.cnblogs.com/TTyb/p/5784581.html
public class SpiderTest1 {
    public static void main(String[] args) {
//        Get_Url("http://www.cnblogs.com/TTyb/");
        getOschina("https://zb.oschina.net/project/c8975f076d9b2a14");
    }

    public static void Get_Url(String url) {
        try {
            Document doc = Jsoup.connect(url)
                    //.data("query", "Java")
                    //.userAgent("头部")
                    //.cookie("auth", "token")
                    //.timeout(3000)
                    //.post()
                    .get();

            //得到html的所有东西
            Element content = doc.getElementById("content");
            //分离出html下<a>...</a>之间的所有东西
            Elements links = content.getElementsByTag("a");
            //Elements links = doc.select("a[href]");
            // 扩展名为.png的图片
            Elements pngs = doc.select("img[src$=.png]");
            // class等于masthead的div标签
            Element masthead = doc.select("div.masthead").first();

            System.out.println("------------列出Link------------------");
            for (Element link : links) {
                //得到<a>...</a>里面的网址
                String linkHref = link.attr("href");
                //得到<a>...</a>里面的汉字
                String linkText = link.text();
                System.out.println(linkText);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void getOschina(String url){
        try{
            Document doc = Jsoup.connect(url)
                    //.data("query", "Java")
                    //.userAgent("头部")
                    //.cookie("auth", "token")
                    //.timeout(3000)
                    //.post()
                    .get();
            //得到html的所有东西
//            Element content = doc.getElementById("content");

            Elements content = doc.select("div.detail");
            content = doc.getElementsByClass("detail");
            content = content.select("div.content.simditor").select("div.project-attr");
            for(Element element: content){
                System.out.println("-----------------record-----------------");
                System.out.println(element.text());
            }
        }catch (Exception e){

        }
    }
}
